library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.1.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readxl)
library(rvest)
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
library(httr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(flexdashboard)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:httr':
## 
##     config
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Import dataset

raw_sub_crime = 
  read_csv("./data/subwaycrime.csv") %>% 
  janitor::clean_names()
## New names:
## * `` -> ...1
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 6244 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (20): BORO_NM, CMPLNT_FR_DT, CMPLNT_TO_DT, CRM_ATPT_CPTD_CD, JURIS_DESC...
## dbl  (11): ...1, CMPLNT_NUM, ADDR_PCT_CD, JURISDICTION_CODE, KY_CD, PD_CD, T...
## lgl   (4): HADEVELOPT, HOUSING_PSA, LOC_OF_OCCUR_DESC, PARKS_NM
## time  (2): CMPLNT_FR_TM, CMPLNT_TO_TM
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_sub_station = 
  read_xlsx("./data/subway_info_final.xlsx") %>% 
  janitor::clean_names()

Crime event v.s. Month

sub_crime_freq = 
  raw_sub_crime %>% 
  select(cmplnt_fr_dt, cmplnt_fr_tm, ofns_desc, law_cat_cd, station_name, latitude, longitude) %>% 
  rename("date" = "cmplnt_fr_dt", "time" = "cmplnt_fr_tm", "crime_event" = "ofns_desc") %>% 
  mutate(date = as.Date(date, "%m/%d/%Y")) %>% 
  mutate(date = substring(as.character(as.Date(date, "%m/%d/%y")),1,7)) %>% 
  filter(!(date %in% c("1971-09","2016-11","2018-04","2018-05")))
  
plot_1 = 
  sub_crime_freq %>% 
  group_by(date) %>% 
  summarise(event_num = n()) %>% 
  plot_ly(
    x = ~date, y = ~event_num, type = "scatter", mode = "markers"
  )

layout(plot_1, title = "Crime events over time", xaxis = list(title = "Month"), yaxis = list(title = "Number of Crime Events"))

Generally, Top 10 most frequent crime events

sub_crime_freq %>% 
  count(crime_event) %>% 
  mutate(crime_event = fct_reorder(crime_event, n)) %>% 
  slice_max(n, n = 10) %>% 
  plot_ly(x = ~ crime_event, y = ~ n, color = ~ crime_event, type = "bar", colors = "viridis") %>% 
  layout(yaxis = list(title = 'Top 10 most frequenct crime events'),
         xaxis = list(title = 'Crime Events'))

Top 5 Most Frequent crime events change over time

most_freq_event = 
  sub_crime_freq %>%
  group_by(crime_event) %>% 
  summarize(event_num = n()) %>% 
  arrange(desc(event_num))

plot_2 = 
  sub_crime_freq %>% 
  group_by(law_cat_cd, date) %>% 
  summarize(event_num = n()) %>% 
  ggplot(aes(x = date, y = event_num, color = law_cat_cd)) + 
  geom_point() + geom_line() + 
  theme(legend.text=element_text(size=7))
## `summarise()` has grouped output by 'law_cat_cd'. You can override using the `.groups` argument.
  ggplotly(plot_2) %>% 
  layout(plot_2, title = "3 Degrees of Crime Events Numbers Over Time", xaxis = list(title = "Month"), yaxis = list(title = "Number of Crime Events"))

Crime events v.s. Time

sub_crime_time = 
  raw_sub_crime %>% 
  select(cmplnt_fr_dt, cmplnt_fr_tm, ofns_desc, station_name, latitude, longitude) %>% 
  rename("start_date" = "cmplnt_fr_dt", "start_time" = "cmplnt_fr_tm", "crime_event" = "ofns_desc") %>% 
  mutate(
    event_time = as.character(case_when(
      hms("00:00:00") <= start_time & start_time < hms("02:00:00") ~hms("00:00:00"),
      hms("02:00:00") <= start_time & start_time < hms("04:00:00") ~hms("04:00:00"),
      hms("04:00:00") <= start_time & start_time < hms("06:00:00") ~hms("04:00:00"),
      hms("06:00:00") <= start_time & start_time < hms("08:00:00") ~hms("08:00:00"),
      hms("08:00:00") <= start_time & start_time < hms("10:00:00") ~hms("08:00:00"),
      hms("10:00:00") <= start_time & start_time < hms("12:00:00") ~hms("12:00:00"),
      hms("12:00:00") <= start_time & start_time < hms("14:00:00") ~hms("12:00:00"),
      hms("14:00:00") <= start_time & start_time < hms("16:00:00") ~hms("16:00:00"),
      hms("16:00:00") <= start_time & start_time < hms("18:00:00") ~hms("16:00:00"),
      hms("18:00:00") <= start_time & start_time < hms("20:00:00") ~hms("20:00:00"),
      hms("20:00:00") <= start_time & start_time < hms("23:59:59") ~hms("20:00:00"),
    ))
  ) %>% 
  filter(crime_event %in% c("CRIMINAL MISCHIEF & RELATED OF", "ASSAULT 3 & RELATED OFFENSES","HARRASSMENT 2","GRAND LARCENY","DANGEROUS DRUGS","FELONY ASSAULT","ROBBERY","PETIT LARCENY","FORGERY","SEX CRIMES","OFF. AGNST PUB ORD SENSBLTY &","DANGEROUS WEAPONS"))

plot_3 = 
  sub_crime_time %>% 
  mutate(event_time = as.factor(event_time)) %>% 
  ggplot(aes(x = event_time %>% fct_infreq(), fill = crime_event)) + 
  geom_histogram(stat = "count", width = 0.9, height = 2) + 
  labs(
    title = "Frequency of crime events v.s. Time points", 
    x = "Occurrence time", 
    y = "Frequency of crime events") + 
  theme_bw() + 
  theme(
    plot.title = element_text(hjust = 1), 
    legend.position = "bottom",
    legend.text = element_text(size = 8)) + 
  guides(col = guide_legend(nrow = 2))
## Warning: Ignoring unknown parameters: binwidth, bins, pad, height
ggplotly(plot_3) %>%
  layout(legend = list(
      orientation = "h",
      xanchor = "center",
      yanchor = "top",
      x = 0.3,
      y = - 0.3
    )
  )

Response time

crime_response_time = 
  raw_sub_crime %>% 
  rename("start_date" = "cmplnt_fr_dt", "start_time" = "cmplnt_fr_tm", "end_date" = "cmplnt_to_dt", "end_time" = "cmplnt_to_tm", "crime_event" = "ofns_desc") %>% 
  select(start_date, start_time, end_date, end_time, crime_event, law_cat_cd) %>% 
  drop_na(start_time, end_time) %>%
  filter(crime_event %in% c("CRIMINAL MISCHIEF & RELATED OF", "ASSAULT 3 & RELATED OFFENSES","HARRASSMENT 2","GRAND LARCENY","DANGEROUS DRUGS","FELONY ASSAULT","ROBBERY","PETIT LARCENY","FORGERY","SEX CRIMES","OFF. AGNST PUB ORD SENSBLTY &","DANGEROUS WEAPONS")) %>% 
  mutate(start_date = as.character(as.Date(start_date, "%m/%d/%Y")), 
         end_date = as.character(as.Date(end_date, "%m/%d/%Y"))) %>% 
  mutate(start = as.POSIXct(paste(start_date, start_time), format = "%Y-%m-%d %H:%M:%S"), 
         end = as.POSIXct(paste(end_date, end_time), format = "%Y-%m-%d %H:%M:%S")) %>% 
  mutate(response_time = difftime(end, start, units = "hours")) %>% 
  mutate(date = substring(start_date,1,7)) %>% 
  filter(!(start_date %in% c("1971-09","2016-11","2018-04","2018-05"))) %>% 
  mutate(
    event_time = as.character(case_when(
      hms("00:00:00") <= start_time & start_time < hms("02:00:00") ~hms("00:00:00"),
      hms("02:00:00") <= start_time & start_time < hms("04:00:00") ~hms("04:00:00"),
      hms("04:00:00") <= start_time & start_time < hms("06:00:00") ~hms("04:00:00"),
      hms("06:00:00") <= start_time & start_time < hms("08:00:00") ~hms("08:00:00"),
      hms("08:00:00") <= start_time & start_time < hms("10:00:00") ~hms("08:00:00"),
      hms("10:00:00") <= start_time & start_time < hms("12:00:00") ~hms("12:00:00"),
      hms("12:00:00") <= start_time & start_time < hms("14:00:00") ~hms("12:00:00"),
      hms("14:00:00") <= start_time & start_time < hms("16:00:00") ~hms("16:00:00"),
      hms("16:00:00") <= start_time & start_time < hms("18:00:00") ~hms("16:00:00"),
      hms("18:00:00") <= start_time & start_time < hms("20:00:00") ~hms("20:00:00"),
      hms("20:00:00") <= start_time & start_time < hms("23:59:59") ~hms("20:00:00"),
    ))
  )

crime_response_time %>% 
  mutate(crime_event = fct_reorder(crime_event, response_time)) %>% 
  plot_ly(y = ~ response_time, color = ~ crime_event, type = "box", colors = "viridis")